# -------------------------------------------------------------------
# Purpose: Creates Table B1
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(poutputappendix))


# Load data
load(file.path(pdataanalysis, "countyLevel19001940.RData"))


# Create table 
temp <- countyLevel19001940 %>% select(
  `Surname diversity` = entropy_namelast_mp_adjp_w,
  `Predicted surname diversity` = iv_lo_entropy_namelast_mp_adjp_fe_immig_w,
  `Population` = sum_n_namelast_mp_adjp_w,
  `Predicted population` = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_w,
  `Patents per 1,000 people` = sum_patents_pc_1900_f_w,
  `Breakthrough patents per 1,000 people` = sum_break_p80_rrfsim05_pc_1900_f_w,
  `Birth-country diversity` = entropy_cob_adjp_w,
  `Ancestral-country diversity` = entropy_coo_adjp_w,
  `Predicted ancestral-country diversity` = iv_lo_entropy_coo_adjp_fe_immig_w,
  `Racial diversity` = entropy_race_adjp_w,
  `Occupational diversity` = entropy_occ1950_adjp_w,
  `USPTO tech codes diversity` = entropy_uspto_techn_w,
  `Segregation of surname groups` = segregation_namelast_mp_adjp_w,
  `Segregation of ancestral-country groups` = segregation_coo_adjp_w,
  `Strength of family ties` = sft_pc1_w,
  `Surname diversity ($<$ 100 miles)` = entropy_namelast_mp_adjp_100m_w,
  `Predicted surname diversity ($<$ 100 miles)` = iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_w,
  `Surname diversity (100 $<$ 200 miles)` = entropy_namelast_mp_adjp_100_200m_w,
  `Predicted surname diversity (100 $<$ 200 miles)` = iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_w,
  `Surname diversity (200 $<$ 300 miles)` = entropy_namelast_mp_adjp_200_300m_w,
  `Predicted surname diversity (200 $<$ 300 miles)` = iv_lo_entropy_namelast_mp_adjp_fe_immig_200_300m_w,
  `Population ($<$ 100 miles)` = sum_n_namelast_mp_adjp_100m_w,
  `Predicted population ($<$ 100 miles)` = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_w,
  `Population (100 $<$ 200 miles)` = sum_n_namelast_mp_adjp_100_200m_w,
  `Predicted population (100 $<$ 200 miles)` = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_w,
  `Population (200 $<$ 300 miles)` = sum_n_namelast_mp_adjp_200_300m_w,
  `Predicted population (200 $<$ 300 miles)` = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_200_300m_w
)
tablename <- file.path(poutputappendix, "tableB01.tex")
options("modelsummary_format_numeric_latex" = "plain")
modelsummary::datasummary(
  `Surname diversity` +
    `Predicted surname diversity` +
    `Population` +
    `Predicted population` +
    `Patents per 1,000 people` +
    `Breakthrough patents per 1,000 people` +
    `Birth-country diversity` +
    `Ancestral-country diversity` +
    `Predicted ancestral-country diversity` +
    `Racial diversity` +
    `Occupational diversity` +
    `USPTO tech codes diversity` +
    `Segregation of surname groups` +
    `Segregation of ancestral-country groups` +
    `Strength of family ties` +
    `Surname diversity ($<$ 100 miles)` +
    `Predicted surname diversity ($<$ 100 miles)` +
    `Surname diversity (100 $<$ 200 miles)` +
    `Predicted surname diversity (100 $<$ 200 miles)` +
    `Surname diversity (200 $<$ 300 miles)` +
    `Predicted surname diversity (200 $<$ 300 miles)` +
    `Population ($<$ 100 miles)` +
    `Predicted population ($<$ 100 miles)` +
    `Population (100 $<$ 200 miles)` +
    `Predicted population (100 $<$ 200 miles)` +
    `Population (200 $<$ 300 miles)` +
    `Predicted population (200 $<$ 300 miles)`
  ~ N + Mean + SD + Min + Max,
  data = temp,
  output = tablename,
  align = "lccccc",
  fmt = comma3,
  escape = FALSE
)
get_summary_stats_rows(tablename)
add_table_row(tablename,
  "Surname diversity &",
  "\\textbf{Surname Diversity and Population} \\\\",
  where = "before"
)
add_table_row(tablename,
  "Patents per 1,000 people",
  "\\textbf{Patents and Breakthroughs} \\\\",
  where = "before"
)
add_table_row(tablename,
  "Birth-country diversity",
  "\\multicolumn{2}{l}{\\textbf{Other diversities, Segregation and Family Ties}} \\\\",
  where = "before"
)
add_table_row(tablename,
  "100 miles",
  "\\textbf{Spillovers} \\\\",
  where = "before"
)
add_table_row(tablename,
  "toprule",
  "\\\\",
  where = "before"
)
cat("Table B1 saved to:", tablename, "\n")